Introduce a product classifier

akatsoulas · akatsoulas · commit 8c88f3601a09 · 2025-05-27T16:47:28.000+03:00
diff --git a/kitsune/llm/questions/classifiers.py b/kitsune/llm/questions/classifiers.py
@@ -3,7 +3,14 @@
 from django.db import models
 from langchain.schema.runnable import RunnableLambda, RunnablePassthrough
 
-from kitsune.llm.questions.prompt import spam_parser, spam_prompt, topic_parser, topic_prompt
+from kitsune.llm.questions.prompt import (
+    product_parser,
+    product_prompt,
+    spam_parser,
+    spam_prompt,
+    topic_parser,
+    topic_prompt,
+)
 from kitsune.llm.utils import get_llm
 from kitsune.products.utils import get_taxonomy
 
@@ -39,32 +46,59 @@ def classify_question(question: "Question") -> dict[str, Any]:
     }
 
     spam_detection_chain = spam_prompt | llm | spam_parser
+    product_classification_chain = product_prompt | llm | product_parser
     topic_classification_chain = topic_prompt | llm | topic_parser
 
+    def handle_spam(payload: dict[str, Any], spam_result: dict[str, Any]) -> dict[str, Any]:
+        """Handle spam classification with potential product reclassification."""
+        confidence = spam_result.get("confidence", 0)
+        match confidence:
+            case _ if confidence >= HIGH_CONFIDENCE_THRESHOLD:
+                action = ModerationAction.SPAM
+            case _ if confidence > LOW_CONFIDENCE_THRESHOLD:
+                action = ModerationAction.FLAG_REVIEW
+            case _:
+                action = ModerationAction.NOT_SPAM
+
+        if action != ModerationAction.SPAM:
+            return {"action": action, "product_result": {}}
+
+        product_result = product_classification_chain.invoke(payload)
+        new_product = product_result.get("product")
+
+        if new_product and new_product != payload["product"]:
+            payload["product"] = new_product
+            payload["topics"] = get_taxonomy(
+                new_product, include_metadata=["description", "examples"], output_format="JSON"
+            )
+            topic_result = topic_classification_chain.invoke(payload)
+            return {
+                "action": ModerationAction.NOT_SPAM,
+                "product_result": product_result,
+                "topic_result": topic_result,
+            }
+        else:
+            return {
+                "action": ModerationAction.SPAM,
+                "product_result": product_result,
+            }
+
     def decision_lambda(payload: dict[str, Any]) -> dict[str, Any]:
         spam_result: dict[str, Any] = payload["spam_result"]
-        confidence: int = spam_result.get("confidence", 0)
         is_spam: bool = spam_result.get("is_spam", False)
-        result = {
-            "action": ModerationAction.NOT_SPAM,
+
+        base_result = {
             "spam_result": spam_result,
+            "product_result": {},
             "topic_result": {},
         }
 
         if is_spam:
-            match confidence:
-                case _ if confidence >= HIGH_CONFIDENCE_THRESHOLD:
-                    result["action"] = ModerationAction.SPAM
-                case _ if (
-                    confidence > LOW_CONFIDENCE_THRESHOLD
-                    and confidence < HIGH_CONFIDENCE_THRESHOLD
-                ):
-                    result["action"] = ModerationAction.FLAG_REVIEW
-
-        if result["action"] == ModerationAction.NOT_SPAM:
-            result["topic_result"] = topic_classification_chain.invoke(payload)
-
-        return result
+            spam_handling = handle_spam(payload, spam_result)
+            return {**base_result, **spam_handling}
+
+        topic_result = topic_classification_chain.invoke(payload)
+        return {**base_result, "topic_result": topic_result}
 
     pipeline = RunnablePassthrough.assign(spam_result=spam_detection_chain) | RunnableLambda(
         decision_lambda
diff --git a/kitsune/llm/questions/prompt.py b/kitsune/llm/questions/prompt.py
@@ -34,6 +34,49 @@
 {format_instructions}
 """
 
+PRODUCT_INSTRUCTIONS = """
+# Role and Goal
+You are a specialized product reclassification agent for Mozilla's support forums.  
+Your task is to evaluate user-submitted questions previously flagged as spam and determine 
+if they should instead be reassigned to a specific Mozilla product category.
+
+# Available Mozilla Products
+You MUST select exactly one product from the following JSON-formatted list if reassignment is appropriate:
+- **title**: Name of the product.
+- **description**: A short description of the product.
+
+```json
+{products}
+```
+
+# When to Reassign a Question
+Reassign a question to a specific product ONLY if **all** of these criteria apply:
+- The question explicitly mentions or clearly relates to the product's distinctive features or functionalities.
+- The question includes technical terms, error messages, or workflows unique to the specific product.
+- You are highly confident the original spam classification resulted from incorrect product selection.
+- The content represents a legitimate support request, not promotional or spam content.
+
+# When NOT to Reassign
+Do NOT reassign the question if **any** of these criteria apply:
+- The content is genuinely promotional, spam, inappropriate, or clearly unrelated to Mozilla products.
+- You cannot confidently determine the relevant Mozilla product.
+- The question equally involves multiple Mozilla products with no clear primary focus.
+- The original spam classification appears correct, regardless of product selection.
+
+# Task Instructions
+Given a user-submitted question previously flagged as spam, strictly follow these steps:
+1. **Carefully Evaluate** whether the question clearly relates to a specific Mozilla product.
+2. **Spam Verification** - Confirm explicitly that the content is not promotional or actual spam.
+3. **Determine Reassignment:** If the question meets **all** reassignment criteria, explicitly select the most appropriate product. Otherwise, do not reassign.
+4. Indicate your **confidence** in your decision (0-100), with higher scores indicating stronger certainty:
+   - `0` = Extremely uncertain.
+   - `100` = Completely certain.
+5. Provide a concise explanation (1–2 sentences) clearly supporting your decision.
+
+# Response Format
+{format_instructions}
+"""
+
 TOPIC_INSTRUCTIONS = """
 # Role and goal
 You are a content classification agent specialized in Mozilla's "{product}" product support forums.
@@ -119,6 +162,34 @@
     )
 )
 
+product_parser = StructuredOutputParser.from_response_schemas(
+    (
+        ResponseSchema(
+            name="product",
+            type="str",
+            description=(
+                "The Mozilla product selected for reassignment or null if no reassignment"
+                " should be made."
+            ),
+        ),
+        ResponseSchema(
+            name="confidence",
+            type="int",
+            description=(
+                "An integer from 0 to 100 that indicates the level of confidence in the"
+                " product reassignment decision, with 0 representing the lowest confidence"
+                " and 100 the highest."
+            ),
+        ),
+        ResponseSchema(
+            name="reason",
+            type="str",
+            description="The reason for reassigning to the selected product "
+            " or for not reassigning.",
+        ),
+    )
+)
+
 
 spam_prompt = ChatPromptTemplate(
     (
@@ -134,3 +205,11 @@
         ("human", USER_QUESTION),
     )
 ).partial(format_instructions=topic_parser.get_format_instructions())
+
+
+product_prompt = ChatPromptTemplate(
+    (
+        ("system", PRODUCT_INSTRUCTIONS),
+        ("human", USER_QUESTION),
+    )
+).partial(format_instructions=product_parser.get_format_instructions())